import requests
from parsel import Selector
import time
import random

SHOP_URL_LIST = []


class CollectShopList:

    def __init__(self, url):
        self.url = url
        self.headers = {
    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7',
    'Accept-Language': 'zh-CN,zh;q=0.9,en;q=0.8,en-GB;q=0.7,en-US;q=0.6',
    'Cache-Control': 'max-age=0',
    'Connection': 'keep-alive',
    # 'Cookie': '_lx_utm=utm_source%3Dbing%26utm_medium%3Dorganic; _lxsdk_cuid=182117ddf1fc8-0174e6028847dd-4c647e53-384000-182117ddf1fc8; _lxsdk=182117ddf1fc8-0174e6028847dd-4c647e53-384000-182117ddf1fc8; WEBDFPID=7x93317yu81w5vz0054wwy257y03y0318107z1z73xy97958702wx99w-2003995845181-1688635843949KIKGKAEfd79fef3d01d5e9aadc18ccd4d0c95073644; _hc.v=1cde0cce-614f-4662-ba6d-60cd73fead9b.1708049442; qruuid=71ae4647-4e1d-4574-9167-dd71e2cb17b6; dper=0202c9258c9828f11698a95f036519d5a08e9077cfb20c7b4f2dc8c79b5f86344a6d2138b43d20dade0d8beba3c9dab080d62914cdbe35a9c86300000000ff1d00007e8b778087fe8b54ef676252daf73bc1128e374c89433a8c026d8d80eb05c8e2de873770f8cff79e5737aa6225ebfe1e; fspop=test; cy=344; cye=changsha; s_ViewType=10; ll=7fd06e815b796be3df069dec7836c3df; Hm_lvt_602b80cf8079ae6591966cc70a3940e7=1708049478,1708565701,1708574157; Hm_lpvt_602b80cf8079ae6591966cc70a3940e7=1708574191; _lxsdk_s=18dcef4a5b9-888-625-29d%7C%7C48',
    'Referer': 'https://www.dianping.com/',
    'Sec-Fetch-Dest': 'document',
    'Sec-Fetch-Mode': 'navigate',
    'Sec-Fetch-Site': 'same-origin',
    'Sec-Fetch-User': '?1',
    'Upgrade-Insecure-Requests': '1',
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0',
    'sec-ch-ua': '"Not_A Brand";v="8", "Chromium";v="120", "Microsoft Edge";v="120"',
    'sec-ch-ua-mobile': '?0',
    'sec-ch-ua-platform': '"Windows"',
}
        self.cookies = {
    '_lx_utm': 'utm_source%3Dbing%26utm_medium%3Dorganic',
    '_lxsdk_cuid': '182117ddf1fc8-0174e6028847dd-4c647e53-384000-182117ddf1fc8',
    '_lxsdk': '182117ddf1fc8-0174e6028847dd-4c647e53-384000-182117ddf1fc8',
    'WEBDFPID': '7x93317yu81w5vz0054wwy257y03y0318107z1z73xy97958702wx99w-2003995845181-1688635843949KIKGKAEfd79fef3d01d5e9aadc18ccd4d0c95073644',
    '_hc.v': '1cde0cce-614f-4662-ba6d-60cd73fead9b.1708049442',
    'qruuid': '71ae4647-4e1d-4574-9167-dd71e2cb17b6',
    'dper': '0202c9258c9828f11698a95f036519d5a08e9077cfb20c7b4f2dc8c79b5f86344a6d2138b43d20dade0d8beba3c9dab080d62914cdbe35a9c86300000000ff1d00007e8b778087fe8b54ef676252daf73bc1128e374c89433a8c026d8d80eb05c8e2de873770f8cff79e5737aa6225ebfe1e',
    'fspop': 'test',
    'cy': '344',
    'cye': 'changsha',
    's_ViewType': '10',
    'll': '7fd06e815b796be3df069dec7836c3df',
    'Hm_lvt_602b80cf8079ae6591966cc70a3940e7': '1708049478,1708565701,1708574157',
    'Hm_lpvt_602b80cf8079ae6591966cc70a3940e7': '1708574191',
    '_lxsdk_s': '18dcef4a5b9-888-625-29d%7C%7C48',
}
        self.html = ""

    def get_html(self):
        response = requests.get(url=self.url, headers=self.headers, cookies=self.cookies)
        response.encoding = "utf-8"
        self.html = response.text

    def parse_html(self):
        global SHOP_URL_LIST
        selector = Selector(text=self.html)
        url_list = selector.xpath('//div[@id="shop-all-list"]/ul/li/div[@class="pic"]/a/@href').getall()

        SHOP_URL_LIST.extend(url_list)

    def run(self):
        self.get_html()
        self.parse_html()


def main():

    for page in range(1, 51):
        print(f"正在爬取第{page}页")
        url = f"https://www.dianping.com/changsha/ch10/p{page}"
        spider = CollectShopList(url)
        spider.run()
        time.sleep(random.uniform(1, 2))
        break

    return SHOP_URL_LIST


if __name__ == '__main__':

    for page in range(1, 4):
        print(f"正在爬取第{page}页")
        url = f"https://www.dianping.com/changsha/ch10/p{page}"
        spider = CollectShopList(url)
        spider.run()
        time.sleep(random.uniform(1, 2))

    print(SHOP_URL_LIST)
